Load the required libraries.
library(tidyverse)
library(sf)
library(here)
library(readxl)
library(scales)
library(DT)
library(brms)
library(tidybayes)
library(patchwork)
library(marginaleffects)
library(ggrepel)
library(scico)
library(ggdensity)
library(ggpubr)
library(units)
#library(ggsn)
Functions that we will use throughout the script
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
Function for counterfactual plots
plot_counterfactual <- function(model_data, model, population_denominator, outcome, grouping_var=NULL, re_formula,...){
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
summary <- {{model_data}} %>%
select(year, year2, y_num, acf_period, {{population_denominator}}, {{outcome}}, {{grouping_var}}) %>%
add_epred_draws({{model}}, re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, {{outcome}}) %>%
mutate(acf_period = "a. pre-acf"), re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#plot the intervention effect
p <- summary %>%
droplevels() %>%
ggplot() +
geom_ribbon(aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = counterfact %>% filter(year>=1956),
aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = counterfact %>% filter(year>=1956),
aes(y=.epred_inc, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred_inc, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = {{model_data}}, aes(y={{outcome}}, x=year2, shape=acf_period), size=2) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
theme_ggdist() +
scale_y_continuous(labels=comma, limits = c(0,NA)) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="", na.translate = F) +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="", na.translate = F) +
scale_shape_discrete(name="", na.translate = F) +
labs(
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA),
text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 90, hjust=1, vjust=0.5),
legend.text = element_text(size=8)) +
guides(shape="none")
facet_vars <- vars(...)
if (length(facet_vars) != 0) {
p <- p + facet_wrap(facet_vars)
}
p
}
Function for calculating measures of change over time (RR.peak, RR.level, RR.slope)
summarise_change <- function(model_data, model, population_denominator, grouping_var = NULL, re_formula = NULL) {
#functions for calculating RR.peak
#i.e. relative case notification rate in 1957 vs. counterfactual trend for 1957
grouping_var <- enquo(grouping_var)
if (!is.null({{grouping_var}})) {
#make the prediction matrix, conditional on whether we want random effects included or not.
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
} else {
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
}
peak_draws <- add_epred_draws(newdata = out,
object = {{model}},
re_formula = {{re_formula}}) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.peak")
peak_summary <- peak_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.peak")
#functions for calculating RR.level
#i.e. relative case notification rate in 1958 vs. counterfactual trend for 1958
if (!is.null({{grouping_var}})) {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
level_draws <- add_epred_draws(newdata = out2,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num, .draw) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.level")
level_summary <- level_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.level")
#functions for calculating RR.slope
#i.e. relative change in case notification rate in 1958-1963 vs. counterfactual trend for 1959-1963
if (!is.null({{grouping_var}})) {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
slope_draws <- add_epred_draws(newdata = out3,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num) %>%
ungroup() %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, y_num, !!grouping_var) %>%
summarise(slope = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
slope_summary <- slope_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.slope")
#gather all the results into a named list
lst(peak_draws=peak_draws, peak_summary=peak_summary,
level_draws=level_draws, level_summary=level_summary,
slope_draws=slope_draws, slope_summary=slope_summary)
}
Function for calculating difference from counterfactual
calculate_counterfactual <- function(model_data, model, population_denominator, grouping_var=NULL, re_formula=NA){
#effect vs. counterfactual
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc_counterf = .epred/{{population_denominator}}*100000, .epred_counterf=.epred) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, .draw, .epred_counterf, .epred_inc_counterf, {{grouping_var}})
#Calcuate case notification rate per draw, then summarise.
post_change <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, {{grouping_var}}, .draw, .epred, .epred_inc, {{grouping_var}})
#for the overall period
counterfact_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, .draw, .epred, {{grouping_var}}) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred_counterf = sum(.epred))
#Calcuate case notification rate per draw, then summarise.
post_change_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, {{grouping_var}}, .draw, .epred) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred = sum(.epred))
counter_post <-
left_join(counterfact, post_change) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf,
diff_inc100k = .epred_inc - .epred_inc_counterf,
rr_inc100k = .epred_inc/.epred_inc_counterf) %>%
group_by(year, {{grouping_var}}) %>%
mean_qi(cases_averted, pct_change, diff_inc100k, rr_inc100k) %>%
ungroup()
counter_post_overall <-
left_join(counterfact_overall, post_change_overall) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by({{grouping_var}}) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup()
lst(counter_post, counter_post_overall)
}
Function for tidying up counterfactuals (mostly for making nice tables)
tidy_counterfactuals <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"),
diff_inc = glue::glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr_inc = glue::glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"))
}
tidy_counterfactuals_overall <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"))
}
Import datasets for analysis
Make a map of Glasgow wards
glasgow_wards_1951 <- st_read(here("mapping/glasgow_wards_1951.geojson"))
Reading layer `glasgow_wards_1951' from data source
`/Users/petermacpherson/Dropbox/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/glasgow_wards_1951.geojson' using driver `GeoJSON'
Simple feature collection with 37 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -4.393502 ymin: 55.77464 xmax: -4.070411 ymax: 55.92814
Geodetic CRS: WGS 84
#read in Scotland boundary
scotland <- st_read(here("mapping/Scotland_boundary/Scotland boundary.shp"))
Reading layer `Scotland boundary' from data source
`/Users/petermacpherson/Dropbox/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/Scotland_boundary/Scotland boundary.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 1 field
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 5513 ymin: 530249 xmax: 470332 ymax: 1220302
Projected CRS: OSGB36 / British National Grid
#make a bounding box for Glasgow
bbox <- st_bbox(glasgow_wards_1951) |> st_as_sfc()
#plot scotland with a bounding box around the City of Glasgow
scotland_with_bbox <- ggplot() +
geom_sf(data = scotland, fill="antiquewhite") +
geom_sf(data = bbox, colour = "#C60C30", fill="antiquewhite") +
theme_void() +
theme(panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "#EAF7FA", size = 0.3))
Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
Please use the `linewidth` argument instead.
#plot the wards
#note we tidy up some names to fit on map
glasgow_ward_map <- glasgow_wards_1951 %>%
mutate(ward = case_when(ward=="Shettleston and Tollcross" ~ "Shettleston and\nTollcross",
ward=="Partick (West)" ~ "Partick\n(West)",
ward=="Partick (East)" ~ "Partick\n(East)",
ward=="North Kelvin" ~ "North\nKelvin",
ward=="Kinning Park" ~ "Kinning\nPark",
TRUE ~ ward)) %>%
ggplot() +
geom_sf(aes(fill=division)) +
geom_sf_label(aes(label = ward), size=3, fill=NA, label.size = NA, colour="black") +
#scale_colour_identity() +
scale_fill_brewer(palette = "Set3", name="City of Glasgow Division") +
theme_grey() +
labs(x="",
y="",
fill="Division") +
theme(legend.position = "top",
panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "antiquewhite", size = 0.3),
panel.grid.major = element_line(color = "grey78")) +
guides(fill=guide_legend(title.position = "top", title.hjust = 0.5, title.theme = element_text(face="bold")))
#add the map of scotland as an inset
glasgow_ward_map + inset_element(scotland_with_bbox, 0.75, 0, 1, 0.4)
ggsave(here("figures/s1.png"), height=10, width = 12)
NA
NA
Calculate areas per geographical unit
sf_use_s2(FALSE) #https://github.com/r-spatial/sf/issues/1762
Spherical geometry (s2) switched off
glasgow_wards_1951 <- glasgow_wards_1951 %>%
mutate(area = st_area(glasgow_wards_1951))
glasgow_wards_1951$area_km <- units::set_units(glasgow_wards_1951$area, km^2)
Make division shape files, and calculate area (stopped working, need to fix!)
# glasgow_divisions_1951 <- glasgow_wards_1951 %>%
# group_by(division) %>%
# summarize(geometry = st_union(geometry)) %>%
# nngeo::st_remove_holes() %>%
# mutate(area = st_area(glasgow_divisions_1951))
#
# glasgow_divisions_1951$area_km <- units::set_units(glasgow_divisions_1951$area, km^2)
Load in the datasets for denonomiators, and check for consistency.
overall_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "overall_population")
overall_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
overall_pops <- overall_pops %>%
mutate(year2 = year+0.5)
Note, we have three population estimates:
(Population in shipping is estimated from the 1951 census, so is the same for most years)
First, plot the total population
overall_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2), alpha=0.5, colour = "mediumseagreen", fill="mediumseagreen") +
geom_point(aes(y=total_population, x=year2), colour = "mediumseagreen") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: total population",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
Now the population excluding institutionalised and shipping population
overall_pops %>%
ggplot() +
geom_area(aes(y=population_without_inst_ship, x=year2), alpha=0.5, colour = "purple", fill="purple") +
geom_point(aes(y=population_without_inst_ship, x=year2), colour = "purple") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: population excluding institutionalised and shipping",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
There are 5 Divisions containing 37 Wards in the Glasgow Corporation, with consistent boundaries over time.
#look-up table for divisions and wards
ward_lookup <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "divisions_wards")
ward_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "ward_population")
ward_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
ward_pops <- ward_pops %>%
mutate(year2 = year+0.5)
#Get the Division population
division_pops <- ward_pops %>%
group_by(division, year) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship, na.rm = TRUE),
institutions = sum(institutions, na.rm = TRUE),
shipping = sum(shipping, na.rm = TRUE),
total_population = sum(total_population, na.rm = TRUE))
`summarise()` has grouped output by 'division'. You can override using the `.groups` argument.
division_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Plot the overall population by Division and Ward
division_pops %>%
mutate(year2 = year+0.5) %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(division~.) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name = "") +
scale_colour_brewer(palette = "Set3", name = "") +
labs(
title = "Glasgow Corporation: total population by Division",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
ward_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(ward~., ncol=6) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name="Division") +
scale_colour_brewer(palette = "Set3", name = "Division") +
labs(
title = "Glasgow City: total population by Ward",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
ggsave(here("figures/s2.png"), height=10, width=12)
Approximately, how many person-years of follow-up do we have?
overall_pops %>%
ungroup() %>%
summarise(across(year, length, .names = "years"),
across(c(population_without_inst_ship, total_population), sum)) %>%
mutate(across(where(is.double), comma)) %>%
datatable()
NA
NA
Change in population by ward
ward_pops %>%
group_by(ward) %>%
summarise(pct_change_pop = (last(population_without_inst_ship) - first(population_without_inst_ship))/first(population_without_inst_ship)) %>%
mutate(pct_change_pop = percent(pct_change_pop)) %>%
arrange(pct_change_pop) %>%
datatable()
NA
NA
NA
Output population density by ward and divison for regression modelling
Wards first
(stopped working, need to fix)
# ward_covariates <- glasgow_wards_1951 %>%
# left_join(ward_pops) %>%
# mutate(people_per_km_sq = as.double(population_without_inst_ship/area_km))
#
# #plot it out
#
# ward_covariates %>%
# ggplot() +
# geom_sf(aes(fill=people_per_km_sq)) +
# facet_wrap(year~., ncol=7) +
# scale_fill_viridis_c(option="A") +
# theme(legend.position = "bottom",
# axis.text.x = element_text(angle = 45, hjust=1))
#
# ggsave(here("figures/ward_pop_density.png"), width=10)
#
# write_rds(ward_covariates, here("populations/ward_covariates.rds"))
Now divisions first
(stopped working, need to fix)
# division_covariates <- glasgow_divisions_1951 %>%
# left_join(division_pops) %>%
# mutate(people_per_km_sq = as.double(total_population/area_km))
#
# #plot it out
#
# division_covariates %>%
# ggplot() +
# geom_sf(aes(fill=people_per_km_sq)) +
# geom_sf_label(aes(label = division), size=3, fill=NA, label.size = NA, colour="black", family = "Segoe UI") +
# facet_wrap(year~., ncol=7) +
# scale_fill_viridis_c(option="G") +
# theme(legend.position = "bottom",
# axis.text.x = element_text(angle = 45, hjust=1))
#
# ggsave(here("figures/division_pop_density.png"), width=10)
#
# write_rds(division_covariates, here("populations/division_covariates.rds"))
age_sex <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "age_sex_population") %>%
pivot_longer(cols = c(male, female),
names_to = "sex")
#collapse down to smaller age groups to be manageable
age_sex <- age_sex %>%
ungroup() %>%
mutate(age = case_when(age == "0 to 4" ~ "00 to 04",
age == "5 to 9" ~ "05 to 14",
age == "10 to 14" ~ "05 to 14",
age == "15 to 19" ~ "15 to 24",
age == "20 to 24" ~ "15 to 24",
age == "25 to 29" ~ "25 to 34",
age == "30 to 34" ~ "25 to 34",
age == "35 to 39" ~ "35 to 44",
age == "40 to 44" ~ "35 to 44",
age == "45 to 49" ~ "45 to 59",
age == "50 to 54" ~ "45 to 59",
age == "55 to 59" ~ "45 to 59",
TRUE ~ "60 & up")) %>%
group_by(year, age, sex) %>%
mutate(value = sum(value)) %>%
ungroup()
m_age_sex <- lm(value ~ splines::ns(year, knots = 3)*age*sex, data = age_sex)
summary(m_age_sex)
Warning: essentially perfect fit: summary may be unreliable
Call:
lm(formula = value ~ splines::ns(year, knots = 3) * age * sex,
data = age_sex)
Residuals:
Min 1Q Median 3Q Max
-1.185e-10 0.000e+00 0.000e+00 0.000e+00 1.185e-10
Coefficients: (14 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.222e+04 2.040e-10 2.559e+14 <2e-16 ***
splines::ns(year, knots = 3)1 -8.043e+03 4.071e-10 -1.976e+13 <2e-16 ***
splines::ns(year, knots = 3)2 NA NA NA NA
age05 to 14 3.669e+04 2.499e-10 1.468e+14 <2e-16 ***
age15 to 24 -3.893e+03 2.499e-10 -1.558e+13 <2e-16 ***
age25 to 34 -3.996e+04 2.499e-10 -1.599e+14 <2e-16 ***
age35 to 44 -4.230e+04 2.499e-10 -1.693e+14 <2e-16 ***
age45 to 59 5.459e+04 2.356e-10 2.317e+14 <2e-16 ***
age60 & up 7.533e+04 2.204e-10 3.418e+14 <2e-16 ***
sexmale 3.374e+03 2.886e-10 1.169e+13 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14 -1.863e+03 4.985e-10 -3.737e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14 NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24 7.533e+04 4.985e-10 1.511e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24 NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34 1.325e+05 4.985e-10 2.658e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34 NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44 1.380e+05 4.985e-10 2.769e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44 NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59 3.474e+03 4.700e-10 7.390e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59 NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up -8.453e+04 4.397e-10 -1.923e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up NA NA NA NA
splines::ns(year, knots = 3)1:sexmale -1.994e+03 5.757e-10 -3.464e+12 <2e-16 ***
splines::ns(year, knots = 3)2:sexmale NA NA NA NA
age05 to 14:sexmale 1.053e+04 3.534e-10 2.980e+13 <2e-16 ***
age15 to 24:sexmale 2.352e+04 3.534e-10 6.656e+13 <2e-16 ***
age25 to 34:sexmale 1.355e+04 3.534e-10 3.833e+13 <2e-16 ***
age35 to 44:sexmale -1.727e+03 3.534e-10 -4.888e+12 <2e-16 ***
age45 to 59:sexmale 2.774e+03 3.332e-10 8.324e+12 <2e-16 ***
age60 & up:sexmale -7.761e+04 3.117e-10 -2.490e+14 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14:sexmale -2.049e+04 7.051e-10 -2.906e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24:sexmale -6.780e+04 7.051e-10 -9.616e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34:sexmale -3.804e+04 7.051e-10 -5.396e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44:sexmale -1.171e+04 7.051e-10 -1.661e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59:sexmale -3.473e+04 6.647e-10 -5.224e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up:sexmale 1.056e+05 6.218e-10 1.698e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up:sexmale NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.074e-11 on 44 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 6.006e+29 on 27 and 44 DF, p-value: < 2.2e-16
age_levels <- age_sex %>% select(age) %>% distinct() %>% pull()
age_sex_nd <-
crossing(
age=age_levels,
sex=c("male", "female"),
year = 1950:1963
)
pred_pops <- age_sex_nd %>% modelr::add_predictions(m_age_sex)
Warning: prediction from a rank-deficient fit may be misleading
pred_pops %>%
ggplot(aes(x=year, y=pred, colour=age)) +
geom_line() +
geom_point() +
facet_grid(sex~.) +
scale_y_continuous(labels = comma, limits = c(0, 125000))
#How well do they match up with our overall populations?
pred_pops %>%
group_by(year) %>%
summarise(sum_pred_pop = sum(pred)) %>%
right_join(overall_pops) %>%
select(year, sum_pred_pop, population_without_inst_ship, total_population) %>%
pivot_longer(cols = c(sum_pred_pop, population_without_inst_ship, total_population)) %>%
ggplot(aes(x=year, y=value, colour=name)) +
geom_point() +
scale_y_continuous(labels = comma, limits = c(800000, 1250000))
Joining with `by = join_by(year)`
pred_pops %>%
group_by(year, sex) %>%
summarise(sum = sum(pred)) %>%
group_by(year) %>%
mutate(sex_ratio = first(sum)/last(sum))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
What percentage of adults (15+ participated in the intervention in 1957)?
pred_pops %>%
ungroup() %>%
filter(year==1957) %>%
filter(age != "00 to 04",
age != "05 to 14") %>%
summarise(total_pop = sum(pred)) %>%
mutate(cxr_screened = 622349) %>%
mutate(pct_pop_cxr_screened = percent(cxr_screened/total_pop))
pred_pops %>%
ungroup() %>%
filter(year==1957) %>%
filter(age != "00 to 04",
age != "05 to 14") %>%
summarise(total_pop = sum(pred), .by=sex) %>%
mutate(cxr_screened = c(340474, 281875)) %>%
mutate(pct_pop_cxr_screened = percent(cxr_screened/total_pop))
NA
NA
Population pyramids
label_abs <- function(x) {
comma(abs(x))
}
pred_pops %>%
ungroup() %>%
group_by(year) %>%
mutate(year_pop = sum(pred),
age_sex_pct = percent(pred/year_pop, accuracy=0.1)) %>%
mutate(sex = case_when(sex=="male" ~ "Male",
sex=="female" ~ "Female")) %>%
ggplot(
aes(x = age, fill = sex,
y = ifelse(test = sex == "Female",yes = -pred, no = pred))) +
geom_bar(stat = "identity") +
geom_text(aes(label = age_sex_pct),
position= position_stack(vjust=0.5), colour="white", size=2.5) +
facet_wrap(year~., ncol=7) +
coord_flip() +
scale_y_continuous(labels = label_abs) +
scale_fill_manual(values = c("mediumseagreen", "purple"), name="") +
theme_ggdist() +
theme(axis.text.x = element_text(angle=90, hjust = 1, vjust=0.5),
legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="", y="")
ggsave(here("figures/s3.png"), width=10)
Saving 10 x 4.5 in image
Not perfect, but resonably good. But ahhhhh… the age groups don’t align with the case notification age groups! Come back to think about this later.
Import the tuberculosis cases dataset
Overall, by year.
cases_by_year <- read_xlsx("2023-11-28_glasgow-acf.xlsx", sheet = "by_year")
cases_by_year%>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
cases_by_year <- cases_by_year %>%
mutate(year2 = year+0.5)
Plot the overall number of case notified per year, by pulmonary and extra pulmonary classification.
cases_by_year %>%
select(-total_notifications, -year) %>%
pivot_longer(cols = c(pulmonary_notifications, `non-pulmonary_notifications`)) %>%
mutate(name = case_when(name == "pulmonary_notifications" ~ "Pulmonary TB",
name == "non-pulmonary_notifications" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
Read in the datasets and merge together.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
ward_sheets <- enframe(all_sheets) %>%
filter(grepl("by_ward", value)) %>%
pull(value)
cases_by_ward_sex_year <- map_df(ward_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_ward_sex_year %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Aggregate together to get cases by division
cases_by_division <- cases_by_ward_sex_year %>%
left_join(ward_lookup) %>%
group_by(division, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE))
Joining with `by = join_by(ward)``summarise()` has grouped output by 'division', 'year'. You can override using the `.groups` argument.
#shift year to midpoint
cases_by_division <- cases_by_division %>%
mutate(year2 = year+0.5) %>%
ungroup()
cases_by_division %>%
select(-year2) %>%
select(year, everything()) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
cases_by_division %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
cases_by_ward <- cases_by_ward_sex_year %>%
group_by(ward, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
ungroup()
`summarise()` has grouped output by 'ward', 'year'. You can override using the `.groups` argument.
cases_by_ward %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
select(year, everything()) %>%
datatable()
#shift year to midpoint
cases_by_ward <- cases_by_ward %>%
mutate(year2 = year+0.5)
cases_by_ward %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.8) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
As we don’t have denominators, we will just model the change in counts.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
age_sex_sheets <- enframe(all_sheets) %>%
filter(grepl("by_age_sex", value)) %>%
pull(value)
cases_by_age_sex <- map_df(age_sex_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_age_sex %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
Now calculate case notification rates per 100,000 population
Merge the notification and population denominator datasets together.
Here we need to include the whole population (with shipping and institutions) as they are included in the notifications.
overall_inc <- overall_pops %>%
left_join(cases_by_year)
Joining with `by = join_by(year, year2)`
overall_inc <- overall_inc %>%
mutate(inc_pulm_100k = pulmonary_notifications/total_population*100000,
inc_ep_100k = `non-pulmonary_notifications`/total_population*100000,
inc_100k = total_notifications/total_population*100000)
overall_inc %>%
select(year, inc_100k, inc_pulm_100k, inc_ep_100k) %>%
mutate_at(.vars = vars(inc_100k, inc_pulm_100k, inc_ep_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
overall_inc %>%
select(year2, inc_pulm_100k, inc_ep_100k) %>%
pivot_longer(cols = c(inc_pulm_100k, `inc_ep_100k`)) %>%
mutate(name = case_when(name == "inc_pulm_100k" ~ "Pulmonary TB",
name == "inc_ep_100k" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
division_inc <- division_pops %>%
left_join(cases_by_division)
Joining with `by = join_by(division, year)`
division_inc <- division_inc %>%
mutate(inc_100k = cases/total_population*100000)
division_inc %>%
select(year, division, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
division_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Here we will filter out the institutions and harbour from the denominators, as we don’t have reliable population denominators for them.
ward_inc <- ward_pops %>%
left_join(cases_by_ward)
Joining with `by = join_by(ward, year, year2)`
ward_inc <- ward_inc %>%
mutate(inc_100k = cases/population_without_inst_ship*100000)
ward_inc %>%
select(year, ward, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
ward_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Incidence (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
NA
NA
On a map
st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(legend.position = "top",
legend.key.width = unit(2, "cm"),
panel.border = element_rect(colour = "grey78", fill=NA)) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
Import the TB mortality data.
First, overall deaths. Note that in the original reports, we have a pulmonary TB death rate per million for all years, and numbers of pulmonary TB deaths for each year apart from 1950.
#get the overall mortality sheets
deaths_sheets <- enframe(all_sheets) %>%
filter(grepl("deaths", value)) %>%
pull(value)
overall_deaths <- map_df(deaths_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
overall_deaths %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
NA
Plot the raw numbers of pulmonary deaths
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_deaths)) +
geom_line(colour = "#DE0D92") +
geom_point(colour = "#DE0D92") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
labs(y="Pulmonary TB deaths per year",
x = "Year",
title = "Numbers of pulmonary TB deaths",
subtitle = "Glasgow, 1950-1963",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: no data for 1950") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
NA
NA
Now the incidence of pulmonary TB death
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_death_rate_per_100k)) +
geom_line(colour = "#4D6CFA") +
geom_point(colour = "#4D6CFA") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(y="Annual incidence of death (per 100,000)",
x = "Year",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s7.png"), width=10)
Saving 10 x 4.5 in image
Make Table 1 here, and save for publication.
overall_pops %>%
select(year, total_population) %>%
left_join(overall_inc %>%
select(year,
pulmonary_notifications, inc_pulm_100k,
`non-pulmonary_notifications`, inc_ep_100k,
total_notifications, inc_100k)) %>%
left_join(overall_deaths %>%
select(year,
pulmonary_deaths, pulmonary_death_rate_per_100k)) %>%
mutate(across(where(is.numeric) & !(year), ~round(., digits=1))) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.)))
Joining with `by = join_by(year)`Joining with `by = join_by(year)`
Prepare the datasets for modelling
mdata <- ward_inc %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup()
mdata_extrapulmonary <- ward_inc %>%
filter(tb_type=="Non-Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup() %>%
filter(year<=1961) #no data for 1962 and 1963
#scaffold for overall predictions
overall_scaffold <- mdata %>%
select(year, year2, y_num, acf_period, population_without_inst_ship, ward, cases) %>%
group_by(year, year2, y_num, acf_period) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship),
cases = sum(cases)) %>%
ungroup() %>%
mutate(inc_100k = cases/population_without_inst_ship*100000) %>%
left_join(mdata_extrapulmonary %>% group_by(year) %>%
summarise(cases_extrapulmonary = sum(cases))) %>%
mutate(inc_100k_extrapulmonary = cases_extrapulmonary/population_without_inst_ship*100000)
`summarise()` has grouped output by 'year', 'year2', 'y_num'. You can override using the `.groups` argument.Joining with `by = join_by(year)`
This models the case notification rate over time, with a step change for the intervention, and slope change after the intervention.
Work on the priors a bit. We will build up from less complex to more complex.
at the intercept, we expect somewhere around 2500. We will set the standard deviation to both 0.5 and 1 to check what it looks like
#
# c(prior(lognormal(7.600902, 0.5)), #log(2500) = 7.600902
# prior(lognormal(7.600902, 1))) %>%
# parse_dist() %>%
#
# ggplot(aes(y = prior, dist = .dist, args = .args)) +
# stat_halfeye(.width = c(.5, .95)) +
# scale_y_discrete(NULL, labels = str_c("lognormal(log(2000), ", c(0.5, 1), ")"),
# expand = expansion(add = 0.1)) +
# xlab(expression(exp(italic(p)(beta[0])))) +
# coord_cartesian(xlim = c(0,15000))
#
#
# prior(gamma(1, 0.01)) %>%
# parse_dist() %>%
# ggplot(aes(y=prior, dist = .dist, args = .args)) +
# stat_halfeye(.width = c(0.5, 0.95))
#
# #now fit to a model, and plot some prior realisations
#
# m_prior1 <- brm(
# cases ~ 0 + Intercept,
# family = negbinomial(),
# data = overall_scaffold,
# sample_prior = "only",
# prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape)
# )
#
# add_epred_draws(object=m_prior1,
# newdata = tibble(intercept=1)) %>%
# ggplot(aes(x=intercept, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(labels = comma)
Now try to add in a term for the effect of y_num. We anticpate that the number of cases will decline by about 1-5% per year. However, as we are pretty uncertain about this, we will just encode a weakly regularising prior to restrict the year size to sensible ranges.
#
#
# m_prior2 <- brm(
# cases ~ 0 + Intercept + y_num,
# family = negbinomial(),
# data = overall_scaffold,
# sample_prior = "only",
# prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b, coef = y_num)
# )
#
# add_epred_draws(object=m_prior2,
# newdata = overall_scaffold) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma)
Now we want to add in a prior for the effect of the acf_intervention. We anticipate the peak to be anywhere between no effect, and a tripling
#
# m_prior3 <- brm(
# cases ~ 0 + Intercept + y_num + acf_period,
# family = negbinomial(),
# data = overall_scaffold,
# sample_prior = "only",
# prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b, coef = y_num) +
# prior(normal(0, 0.001), class = b)
# )
#
#
# add_epred_draws(object=m_prior3,
# newdata = overall_scaffold) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(labels = comma)
#
Now we look and see what it looks like with the interactions
#
# m_prior4 <- brm(
# cases ~ 0 + Intercept + y_num + acf_period + y_num:acf_period,
# family = negbinomial(),
# data = overall_scaffold,
# sample_prior = "only",
# prior = prior(normal(log(2500), 1), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b)
# )
#
# add_epred_draws(object=m_prior4,
# newdata = overall_scaffold) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma)
#
#
Now try adding in the random intercepts
# c(prior(lognormal(3.912023, 0.5)), #log(50) = 3.912023
# prior(lognormal(3.912023, 1))) %>%
# parse_dist() %>%
#
# ggplot(aes(y = prior, dist = .dist, args = .args)) +
# stat_halfeye(.width = c(.5, .95)) +
# scale_y_discrete(NULL, labels = str_c("lognormal(log(50), ", c(0.5, 1), ")"),
# expand = expansion(add = 0.1)) +
# xlab(expression(exp(italic(p)(beta[0])))) +
# coord_cartesian(xlim = c(0,400))
#
#
# m_prior5 <- brm(
# cases ~ y_num + acf_period + y_num:acf_period + ( 1 | ward),
# family = negbinomial(),
# data = mdata,
# sample_prior = "only",
# prior = prior(normal(log(50), 1), class = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b) +
# prior(exponential(1), class=sd)
# )
#
#
# add_epred_draws(object=m_prior5,
# newdata = mdata,
# re_formula = NA) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma)
#
# add_epred_draws(object=m_prior5,
# newdata = mdata,
# re_formula = NA) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma) +
# facet_wrap(ward~.)
And add in the random slopes
#
# m_prior6 <- brm(
# cases ~ 1 + y_num + acf_period + y_num:acf_period + (1 + y_num*acf_period | ward),
# family = negbinomial(),
# data = mdata,
# sample_prior = "only",
# prior = prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.1), class = b) +
# prior(exponential(1), class=sd) +
# prior(lkj(2), class=cor)
# )
#
#
#
# m_prior6 <- brm(
# cases ~ 0 + Intercept + y_num + acf_period + y_num:acf_period + ( y_num*acf_period | ward),
# family = negbinomial(),
# data = mdata,
# sample_prior = "only",
# prior = prior(normal(log(50), 1), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b) +
# prior(exponential(100), class=sd) +
# prior(lkj(2), class=cor)
# )
# add_epred_draws(object=m_prior6,
# newdata = mdata,
# re_formula = NA) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma)
#
# add_epred_draws(object=m_prior6,
# newdata = mdata,
# re_formula = ~( 1 + y_num + acf_period | ward)) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma) +
# facet_wrap(ward~.)
#
# plot_counterfactual(model_data = overall_scaffold, model=m_prior6, outcome = inc_100k,
# population_denominator = population_without_inst_ship, re_formula = NA)
#
# plot_counterfactual(model_data = mdata, model=m_prior6, outcome = inc_100k,
# population_denominator = population_without_inst_ship, grouping_var = ward, ward,
# re_formula = ~( 1 + y_num + acf_period | ward))
Issue here is the non-centred parameterisation of the intercept prior… Feel like this is a more interpretable way to set priors… but will revert to centred parameterisation for the meantime.
# m_centered_prior <- brm(
# cases ~ 1 + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
# data = mdata,
# family = negbinomial(),
# seed = 1234,
# chains = 4, cores = 4,
# prior = prior(normal(0,1000), class = Intercept) +
# prior(gamma(0.01, 0.01), class = shape) +
# prior(normal(0, 1), class = b) +
# prior(exponential(1), class=sd) +
# prior(lkj(2), class=cor),
# sample_prior = "only")
#
# plot(m_centered_prior)
#
# plot_counterfactual(model_data = overall_scaffold, model=m_centered_prior, outcome = inc_100k,
# population_denominator = population_without_inst_ship, re_formula = NA)
#
# plot_counterfactual(model_data = mdata, model=m_centered_prior, outcome = inc_100k,
# population_denominator = population_without_inst_ship, grouping_var = ward, ward,
# re_formula = ~( 1 + y_num*acf_period | ward))
Look at the mean and variance of counts (counts of pulmonary notifications are what we are predicting)
#Mean of counts per year
mean(mdata$cases)
[1] 48.32819
#variance of counts per year
var(mdata$cases)
[1] 915.5749
Quite a bit of over-dispersion here, so negative binomial distribution might be a better choice of distributional family than Poisson.
Fit the model with the data
m_pulmonary <- brm(
cases ~ 1 + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(4), class=cor),
control = list(adapt_delta = 0.9))
Compiling Stan program...
Start sampling
starting worker pid=12571 on localhost:11828 at 13:42:17.890
starting worker pid=12585 on localhost:11828 at 13:42:18.075
starting worker pid=12599 on localhost:11828 at 13:42:18.259
starting worker pid=12613 on localhost:11828 at 13:42:18.525
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000373 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 3.73 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000272 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 2.72 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000325 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 3.25 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000295 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 2.95 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 51.007 seconds (Warm-up)
Chain 3: 38.225 seconds (Sampling)
Chain 3: 89.232 seconds (Total)
Chain 3:
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 51.724 seconds (Warm-up)
Chain 1: 38.144 seconds (Sampling)
Chain 1: 89.868 seconds (Total)
Chain 1:
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 52.983 seconds (Warm-up)
Chain 2: 37.984 seconds (Sampling)
Chain 2: 90.967 seconds (Total)
Chain 2:
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 54.775 seconds (Warm-up)
Chain 4: 38.094 seconds (Sampling)
Chain 4: 92.869 seconds (Total)
Chain 4:
#check model diagnostics
summary(m_pulmonary)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ 1 + y_num * acf_period + (1 + y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata (Number of observations: 518)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Group-Level Effects:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.25 0.03 0.19 0.32 1.00 1676 2545
sd(y_num) 0.02 0.01 0.01 0.03 1.00 910 767
sd(acf_periodb.acf) 0.06 0.04 0.00 0.16 1.00 1539 1541
sd(acf_periodc.postMacf) 0.11 0.06 0.01 0.24 1.00 792 1493
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.02 1.00 1534 1834
sd(y_num:acf_periodc.postMacf) 0.01 0.01 0.00 0.02 1.00 586 1243
cor(Intercept,y_num) -0.45 0.19 -0.75 -0.00 1.00 2105 1928
cor(Intercept,acf_periodb.acf) -0.23 0.29 -0.71 0.38 1.00 3192 2501
cor(y_num,acf_periodb.acf) -0.05 0.27 -0.55 0.49 1.00 5900 3316
cor(Intercept,acf_periodc.postMacf) -0.14 0.24 -0.58 0.37 1.00 3998 2877
cor(y_num,acf_periodc.postMacf) 0.10 0.26 -0.41 0.58 1.00 3393 3029
cor(acf_periodb.acf,acf_periodc.postMacf) 0.07 0.28 -0.47 0.58 1.00 2092 2824
cor(Intercept,y_num:acf_periodb.acf) -0.24 0.28 -0.72 0.36 1.00 3639 3063
cor(y_num,y_num:acf_periodb.acf) -0.05 0.27 -0.55 0.49 1.00 5688 3327
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.05 0.28 -0.58 0.49 1.00 4612 2835
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.07 0.27 -0.46 0.57 1.00 3873 3333
cor(Intercept,y_num:acf_periodc.postMacf) -0.02 0.25 -0.50 0.47 1.00 5667 3341
cor(y_num,y_num:acf_periodc.postMacf) -0.03 0.28 -0.54 0.53 1.00 2983 2512
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.06 0.27 -0.48 0.56 1.00 2562 2754
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.07 0.29 -0.61 0.49 1.00 2466 2876
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.04 0.28 -0.49 0.57 1.00 2347 3061
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -6.14 0.05 -6.23 -6.05 1.00 1002 1503
y_num -0.02 0.01 -0.03 -0.01 1.00 3453 2947
acf_periodb.acf 0.03 0.99 -1.92 1.95 1.00 3857 3041
acf_periodc.postMacf 0.04 0.11 -0.16 0.25 1.00 4109 3148
y_num:acf_periodb.acf 0.08 0.12 -0.16 0.33 1.00 3855 3090
y_num:acf_periodc.postMacf -0.05 0.01 -0.07 -0.03 1.00 3998 2938
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 92.69 20.80 60.69 140.57 1.00 3627 2972
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_pulmonary)
pp_check(m_pulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
prior_summary(m_pulmonary)
prior class coef group resp dpar nlpar lb ub source
normal(0, 1) b user
normal(0, 1) b acf_periodb.acf (vectorized)
normal(0, 1) b acf_periodc.postMacf (vectorized)
normal(0, 1) b y_num (vectorized)
normal(0, 1) b y_num:acf_periodb.acf (vectorized)
normal(0, 1) b y_num:acf_periodc.postMacf (vectorized)
normal(0, 1) Intercept user
lkj_corr_cholesky(4) L user
lkj_corr_cholesky(4) L ward (vectorized)
exponential(1) sd 0 user
exponential(1) sd ward 0 (vectorized)
exponential(1) sd acf_periodb.acf ward 0 (vectorized)
exponential(1) sd acf_periodc.postMacf ward 0 (vectorized)
exponential(1) sd Intercept ward 0 (vectorized)
exponential(1) sd y_num ward 0 (vectorized)
exponential(1) sd y_num:acf_periodb.acf ward 0 (vectorized)
exponential(1) sd y_num:acf_periodc.postMacf ward 0 (vectorized)
gamma(0.01, 0.01) shape 0 user
Summarise the posterior in graphical form
f1b <- plot_counterfactual(model_data = overall_scaffold, model = m_pulmonary,
population_denominator = population_without_inst_ship, outcome = inc_100k, grouping_var=NULL,
re_formula = NA)
f1b
Make this into a figure combined with the map of empirical data
f1a <- st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "top",
legend.key.width = unit(2, "cm"),
legend.title.align = 0.5,
text = element_text(size=8),
axis.text.x = element_text(angle=45, size=6, hjust=1),
axis.text.y = element_text(size=6)) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
(f1a / f1b) + plot_annotation(tag_levels = "A")
ggsave(here("figures/f1.png"))
Saving 7.29 x 4.5 in image
Summary of change in notifications numerically
overall_change <- summarise_change(model_data=overall_scaffold, model=m_pulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
Numbers of pulmonary TB cases averted compared to counterfactual per year.
overall_pulmonary_counterf <- calculate_counterfactual(model_data = overall_scaffold, model=m_pulmonary, population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`Joining with `by = join_by(.draw)`
overall_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Total pulmonary TB cases averted between 1958 and 1963
overall_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
What are the correlations between peak, level, and slope?
#RR.peak histogram
a <- overall_change$peak_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.peak",
y="")
#RR. level histogram
b <- overall_change$level_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.level",
y="")
#RR.slope histogram
c <- overall_change$slope_draws %>%
ggplot() +
geom_histogram(aes(x=estimate), fill="darkblue", colour="darkblue", alpha=0.3)+
scale_fill_gradient(high="lightblue1",low="darkblue") +
#scale_x_continuous(limits = c(0, 6)) +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="RR.slope",
y="")
#Correlation between RR.peak and RR.level
cor_rr_peak_rr_level <- round(cor(pluck(overall_change$peak_draws$estimate), pluck(overall_change$level_draws$estimate)), digits = 2)
#Correlation between RR.peak and RR.slope
cor_rr_peak_rr_slope <- round(cor(pluck(overall_change$peak_draws$estimate), pluck(overall_change$slope_draws$estimate)), digits = 2)
#Correlation between RR.level and RR.slope
cor_rr_level_rr_slope <- round(cor(pluck(overall_change$level_draws$estimate), pluck(overall_change$slope_draws$estimate)), digits = 2)
#plot of correlation between RR.peak and RR.level
d <- bind_cols(RR.peak=pluck(overall_change$peak_draws$estimate),
RR.level =pluck(overall_change$level_draws$estimate)) %>%
ggplot(aes(y=RR.peak, x = RR.level)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=2.2, x=0.58, label=cor_rr_peak_rr_level), colour="firebrick") +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
#plot of correlation between RR.peak and RR.slope
e <- bind_cols(RR.peak=pluck(overall_change$peak_draws$estimate),
RR.slope =pluck(overall_change$slope_draws$estimate)) %>%
ggplot(aes(y=RR.peak, x = RR.slope)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=2.1, x=0.65, label=cor_rr_peak_rr_slope), colour="firebrick") +
#scale_x_continuous(limits = c(0, 6)) +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
#plot of correlation between RR.level and RR.slope
f <- bind_cols(RR.level=pluck(overall_change$level_draws$estimate),
RR.slope =pluck(overall_change$slope_draws$estimate)) %>%
ggplot(aes(y=RR.level, x = RR.slope)) +
geom_hex() +
geom_smooth(se=FALSE, colour="firebrick", method = "lm") +
geom_text(aes(y=0.75, x=0.65, label=cor_rr_level_rr_slope), colour="firebrick") +
#scale_x_continuous(limits = c(0, 6)) +
scale_fill_gradient(high="lightblue1",low="darkblue") +
theme_ggdist() +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey78", fill=NA))
(plot_spacer() + plot_spacer() + c) /
(plot_spacer() + b + f) /
(a + d + e)
ggsave(here("figures/pulmonary_cors.pdf"), width=8, height=8)
NA
NA
NA
Plot the counterfactual at ward level
plot_counterfactual(model_data = mdata, model=m_pulmonary, outcome = inc_100k, population_denominator = population_without_inst_ship,
grouping_var = ward, ward, re_formula= ~(1 + y_num*acf_period | ward))
ggsave(here("figures/s3.png"), width=12, height=12)
Summary of change in notifications at ward level
ward_change <- summarise_change(model_data=mdata, model=m_pulmonary,
population_denominator=population_without_inst_ship, grouping_var=ward,
re_formula = ~(1 + y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw', 'y_num'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
ward_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
#plot these in a figure
ward_effects <- ward_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
bind_rows(overall_change$peak_summary) %>%
bind_rows(overall_change$level_summary) %>%
bind_rows(overall_change$slope_summary) %>%
mutate_at(.vars = vars(estimate:.upper),
.funs = funs(as.numeric)) %>%
select(measure, everything()) %>%
mutate(estimate = as.double(estimate)) %>%
full_join(glasgow_wards_1951) %>%
mutate(ward2 = paste0(ward_number, ". ", ward)) %>%
mutate(ward2 = case_when(is.na(ward) ~ "Overall",
TRUE ~ ward2)) %>%
st_as_sf()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))Joining with `by = join_by(ward)`
#function for plotting choropleth maps
plot_ward_effect <- function(data, measure){
{{data}} %>%
filter(measure == {{measure}}) %>%
ggplot() +
geom_sf(aes(fill=estimate)) +
geom_sf_label(aes(label = ward_number), size=3, fill=NA, label.size = NA, colour="black") +
scale_fill_gradient(high="lightblue1",low="darkblue", name="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
axis.text.x=element_text(angle=45, hjust=1)) +
labs(x="", y="")
}
#function for plotting catapiller plots
plot_ward_cat <- function(data, measure, scale){
pal <- colorRampPalette(c('darkblue','lightblue'))
{{data}} %>%
filter(measure=={{measure}}) %>%
mutate(my_palette = case_when(ward2=="Overall" ~ "#C60C30",
TRUE ~ pal(36)[as.numeric(cut(.$estimate,breaks = 36))])) %>%
ggplot() +
geom_pointrange(aes(y=estimate, ymin=.lower, ymax=.upper,
x=fct_reorder(ward2, estimate), colour=my_palette)) +
coord_flip() +
scale_colour_identity(name="") +
scale_y_continuous() +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x = "",
y = "Relative rate (95% CrI)")
}
ward_peak_i <- plot_ward_effect(data = ward_effects, measure = "RR.peak")
ward_level_i <- plot_ward_effect(data = ward_effects, measure = "RR.level")
ward_slope_i <- plot_ward_effect(data = ward_effects, measure = "RR.slope")
ward_peak_ii <- plot_ward_cat(data = ward_effects, measure = "RR.peak")
ward_level_ii <- plot_ward_cat(data = ward_effects, measure = "RR.level")
ward_slope_ii <- plot_ward_cat(data = ward_effects, measure = "RR.slope")
s4 <- (ward_peak_i + ward_level_i + ward_slope_i) /
(ward_peak_ii + ward_level_ii + ward_slope_ii)
s4[[1]] <- s4[[1]] + plot_layout(tag_level = 'new')
s4[[2]] <- s4[[2]] + plot_layout(tag_level = 'new')
s4 + plot_annotation(tag_levels = c('A', '1'))
ggsave(here("figures/s4.png"), width = 16, height=10)
Calculate the counterfactual per ward
ward_pulmonary_counterf <- calculate_counterfactual(model_data = mdata, model=m_pulmonary,
population_denominator = population_without_inst_ship,
grouping_var = ward, re_formula=~(1 + y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.Joining with `by = join_by(year, population_without_inst_ship, .draw, ward)`Joining with `by = join_by(.draw, ward)`
ward_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Overall counterfactual per ward
ward_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Now we will model the extra-pulmonary TB notification rate. Struggling a bit with negative binomial model, so revert to Poisson.
m_extrapulmonary <- brm(
cases ~ 1 + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata_extrapulmonary,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1000), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(2), class=cor))
Compiling Stan program...
Start sampling
starting worker pid=12801 on localhost:11828 at 13:47:21.663
starting worker pid=12815 on localhost:11828 at 13:47:21.863
starting worker pid=12829 on localhost:11828 at 13:47:22.069
starting worker pid=12843 on localhost:11828 at 13:47:22.283
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000295 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.95 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.00027 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 2.7 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000738 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 7.38 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000241 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 2.41 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 18.538 seconds (Warm-up)
Chain 3: 8.306 seconds (Sampling)
Chain 3: 26.844 seconds (Total)
Chain 3:
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 18.801 seconds (Warm-up)
Chain 2: 8.357 seconds (Sampling)
Chain 2: 27.158 seconds (Total)
Chain 2:
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 18.629 seconds (Warm-up)
Chain 4: 8.394 seconds (Sampling)
Chain 4: 27.023 seconds (Total)
Chain 4:
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 18.367 seconds (Warm-up)
Chain 1: 12.209 seconds (Sampling)
Chain 1: 30.576 seconds (Total)
Chain 1:
summary(m_extrapulmonary)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ 1 + y_num * acf_period + (1 + y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata_extrapulmonary (Number of observations: 444)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Group-Level Effects:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.33 0.06 0.23 0.46 1.00 1503 2462
sd(y_num) 0.02 0.01 0.00 0.04 1.00 451 936
sd(acf_periodb.acf) 0.11 0.09 0.00 0.32 1.00 2251 1370
sd(acf_periodc.postMacf) 0.12 0.09 0.01 0.33 1.00 1677 1902
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.04 1.00 1884 1613
sd(y_num:acf_periodc.postMacf) 0.02 0.01 0.00 0.04 1.00 1028 1648
cor(Intercept,y_num) -0.10 0.30 -0.64 0.52 1.00 2231 2364
cor(Intercept,acf_periodb.acf) -0.01 0.32 -0.61 0.60 1.00 4995 2686
cor(y_num,acf_periodb.acf) -0.00 0.33 -0.65 0.62 1.00 3912 2498
cor(Intercept,acf_periodc.postMacf) -0.07 0.32 -0.66 0.58 1.00 3742 2610
cor(y_num,acf_periodc.postMacf) -0.04 0.33 -0.66 0.61 1.00 3320 2566
cor(acf_periodb.acf,acf_periodc.postMacf) 0.02 0.34 -0.60 0.67 1.00 2917 3101
cor(Intercept,y_num:acf_periodb.acf) -0.01 0.33 -0.63 0.61 1.00 4615 2927
cor(y_num,y_num:acf_periodb.acf) -0.02 0.33 -0.63 0.63 1.00 3487 2438
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.08 0.34 -0.71 0.57 1.00 3404 3133
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.02 0.34 -0.63 0.66 1.00 3443 3302
cor(Intercept,y_num:acf_periodc.postMacf) -0.16 0.31 -0.70 0.48 1.00 3286 2619
cor(y_num,y_num:acf_periodc.postMacf) -0.05 0.33 -0.67 0.57 1.00 2923 2777
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.03 0.33 -0.60 0.65 1.00 2242 2945
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.08 0.34 -0.71 0.61 1.00 2626 2612
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.01 0.33 -0.62 0.64 1.00 2469 3425
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -7.93 0.08 -8.08 -7.78 1.00 1464 2154
y_num -0.09 0.01 -0.12 -0.07 1.00 4426 2523
acf_periodb.acf 0.03 0.98 -1.94 1.94 1.00 2670 2671
acf_periodc.postMacf -0.35 0.39 -1.12 0.40 1.00 2594 2610
y_num:acf_periodb.acf -0.02 0.12 -0.26 0.23 1.00 2656 2529
y_num:acf_periodc.postMacf 0.02 0.04 -0.06 0.10 1.00 2524 2686
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 91.55 63.18 26.77 264.23 1.00 3449 2865
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_extrapulmonary)
pp_check(m_extrapulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise in plot
plot_counterfactual(model_data = overall_scaffold %>% filter(year<=1961), model=m_extrapulmonary,
population_denominator = population_without_inst_ship, outcome=inc_100k_extrapulmonary, re_formula = NA)
ggsave(here("figures/s6.png"), width=10)
Saving 10 x 4.5 in image
Summarise numerically.
overall_change_extrapulmonary <- summarise_change(model_data=overall_scaffold, model=m_extrapulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change_extrapulmonary %>%
keep(names(.) %in% tokeep) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
Numbers of extra-pulmonary TB cases averted overall.
overall_ep_counterf <- calculate_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary,
population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`Joining with `by = join_by(.draw)`
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Total extrapulmonary TB cases averted between 1958 and 1963
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Ward-level extra-pulmonary estimates in graphical form.
plot_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary, outcome = inc_100k,
population_denominator = population_without_inst_ship, grouping_var = ward,re_formula =~(y_num*acf_period | ward),
ward)
ggsave(here("figures/s4.png"), width=10, height=12)
Numerical summary.
ward_change_extrapulmonary <- summarise_change(model_data = mdata_extrapulmonary, model = m_extrapulmonary,
population_denominator = population_without_inst_ship, grouping_var=ward,
re_formula = ~(y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw', 'acf_period'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
ward_change_extrapulmonary %>%
keep(names(.) %in% tokeep) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
NA
Fit the model
(Not rewritten the functions for this yet)
mdata_age_sex <- cases_by_age_sex %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
mutate(year2 = year+0.5) %>%
group_by(age, sex) %>%
mutate(y_num = row_number()) %>%
ungroup()
m_age_sex <- brm(
cases ~ y_num + (acf_period)*(age*sex) + (acf_period:y_num)*(age*sex),
data = mdata_age_sex,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b))
Compiling Stan program...
Start sampling
starting worker pid=13006 on localhost:11828 at 13:49:46.708
starting worker pid=13020 on localhost:11828 at 13:49:46.924
starting worker pid=13034 on localhost:11828 at 13:49:47.120
starting worker pid=13048 on localhost:11828 at 13:49:47.308
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 6.1e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.61 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 6.1e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.61 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 5.6e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.56 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000141 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.41 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 14.272 seconds (Warm-up)
Chain 3: 16.416 seconds (Sampling)
Chain 3: 30.688 seconds (Total)
Chain 3:
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 15.326 seconds (Warm-up)
Chain 2: 17.167 seconds (Sampling)
Chain 2: 32.493 seconds (Total)
Chain 2:
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 14.691 seconds (Warm-up)
Chain 1: 18.176 seconds (Sampling)
Chain 1: 32.867 seconds (Total)
Chain 1:
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 15.351 seconds (Warm-up)
Chain 4: 17.852 seconds (Sampling)
Chain 4: 33.203 seconds (Total)
Chain 4:
summary(m_age_sex)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ y_num + (acf_period) * (age * sex) + (acf_period:y_num) * (age * sex)
Data: mdata_age_sex (Number of observations: 224)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 4.43 0.11 4.21 4.65 1.00 1202 2133
y_num -0.17 0.03 -0.23 -0.11 1.00 1181 2021
acf_periodb.acf -0.02 1.01 -1.96 1.93 1.00 5991 3250
acf_periodc.postMacf -0.48 0.33 -1.13 0.14 1.00 2320 2790
age06_15 0.62 0.15 0.33 0.93 1.00 1767 2657
age16_25 1.85 0.13 1.58 2.11 1.00 1470 2805
age26_35 1.12 0.14 0.85 1.39 1.00 1487 2364
age36_45 0.27 0.15 -0.04 0.56 1.00 1681 2047
age46_55 -0.63 0.17 -0.98 -0.30 1.00 1884 2918
age56_65 -1.08 0.19 -1.46 -0.70 1.00 2432 2970
age65P -1.65 0.22 -2.06 -1.23 1.00 2780 3021
sexM 0.15 0.15 -0.15 0.45 1.00 1076 1754
age06_15:sexM -0.41 0.21 -0.83 -0.01 1.00 1539 2699
age16_25:sexM -0.56 0.18 -0.92 -0.20 1.00 1336 2185
age26_35:sexM -0.32 0.19 -0.69 0.05 1.00 1463 2338
age36_45:sexM 0.25 0.20 -0.14 0.66 1.00 1623 2124
age46_55:sexM 1.16 0.22 0.73 1.59 1.00 1728 2125
age56_65:sexM 1.14 0.24 0.67 1.62 1.00 2042 2298
age65P:sexM 1.02 0.26 0.50 1.54 1.00 2229 2955
acf_periodb.acf:age06_15 0.04 0.97 -1.83 1.98 1.00 7161 3192
acf_periodc.postMacf:age06_15 -0.61 0.52 -1.62 0.44 1.00 3634 3393
acf_periodb.acf:age16_25 0.03 1.00 -1.94 2.01 1.00 7692 2619
acf_periodc.postMacf:age16_25 0.72 0.42 -0.09 1.56 1.00 3055 3053
acf_periodb.acf:age26_35 0.03 1.03 -1.97 1.99 1.00 7987 2727
acf_periodc.postMacf:age26_35 0.65 0.43 -0.20 1.51 1.00 3000 2989
acf_periodb.acf:age36_45 0.06 1.00 -1.91 2.12 1.01 7678 3152
acf_periodc.postMacf:age36_45 0.74 0.46 -0.18 1.65 1.00 3439 3041
acf_periodb.acf:age46_55 0.06 0.98 -1.84 1.99 1.00 7877 2591
acf_periodc.postMacf:age46_55 0.84 0.48 -0.09 1.80 1.00 3588 3242
acf_periodb.acf:age56_65 0.05 0.99 -1.99 2.00 1.00 7601 2783
acf_periodc.postMacf:age56_65 0.61 0.52 -0.42 1.63 1.00 3588 2823
acf_periodb.acf:age65P 0.06 0.98 -1.92 1.97 1.00 6551 3056
acf_periodc.postMacf:age65P 0.95 0.53 -0.09 2.01 1.00 3685 3135
acf_periodb.acf:sexM -0.00 0.99 -1.95 1.88 1.00 7239 3015
acf_periodc.postMacf:sexM -0.07 0.38 -0.82 0.67 1.00 2234 2809
y_num:acf_periodb.acf -0.10 0.13 -0.35 0.16 1.00 5968 3007
y_num:acf_periodc.postMacf 0.04 0.04 -0.03 0.12 1.00 1947 2248
acf_periodb.acf:age06_15:sexM 0.00 0.98 -1.98 1.90 1.00 8260 3245
acf_periodc.postMacf:age06_15:sexM -0.56 0.65 -1.84 0.72 1.00 4807 3353
acf_periodb.acf:age16_25:sexM 0.01 0.96 -1.84 1.92 1.00 6900 3115
acf_periodc.postMacf:age16_25:sexM 0.65 0.53 -0.42 1.68 1.00 4022 3462
acf_periodb.acf:age26_35:sexM 0.02 0.97 -1.85 1.94 1.00 8452 3036
acf_periodc.postMacf:age26_35:sexM 0.40 0.53 -0.68 1.45 1.00 3126 2740
acf_periodb.acf:age36_45:sexM -0.01 0.99 -1.93 1.96 1.00 6626 3024
acf_periodc.postMacf:age36_45:sexM 0.11 0.55 -1.00 1.14 1.00 3777 3023
acf_periodb.acf:age46_55:sexM -0.00 0.99 -1.93 1.94 1.00 7270 3024
acf_periodc.postMacf:age46_55:sexM 0.67 0.56 -0.41 1.80 1.00 3708 3132
acf_periodb.acf:age56_65:sexM 0.00 1.00 -1.97 1.94 1.00 6891 3364
acf_periodc.postMacf:age56_65:sexM 0.37 0.59 -0.77 1.53 1.00 4009 2948
acf_periodb.acf:age65P:sexM 0.00 0.98 -1.88 1.90 1.00 8469 2945
acf_periodc.postMacf:age65P:sexM 0.29 0.60 -0.89 1.47 1.00 4122 3353
y_num:acf_perioda.preMacf:age06_15 0.02 0.04 -0.05 0.10 1.00 1562 2338
y_num:acf_periodb.acf:age06_15 0.15 0.13 -0.11 0.39 1.00 7055 3095
y_num:acf_periodc.postMacf:age06_15 0.08 0.05 -0.01 0.17 1.00 3210 3231
y_num:acf_perioda.preMacf:age16_25 0.13 0.03 0.06 0.19 1.00 1326 1893
y_num:acf_periodb.acf:age16_25 0.25 0.13 -0.01 0.51 1.00 7194 3014
y_num:acf_periodc.postMacf:age16_25 -0.04 0.04 -0.12 0.04 1.00 2497 3173
y_num:acf_perioda.preMacf:age26_35 0.15 0.03 0.08 0.22 1.00 1387 2037
y_num:acf_periodb.acf:age26_35 0.31 0.14 0.05 0.58 1.00 7412 2758
y_num:acf_periodc.postMacf:age26_35 0.02 0.04 -0.06 0.10 1.00 2474 3219
y_num:acf_perioda.preMacf:age36_45 0.17 0.04 0.10 0.25 1.00 1486 2320
y_num:acf_periodb.acf:age36_45 0.40 0.13 0.13 0.66 1.00 7379 3168
y_num:acf_periodc.postMacf:age36_45 0.06 0.04 -0.02 0.14 1.00 2873 2942
y_num:acf_perioda.preMacf:age46_55 0.20 0.04 0.11 0.28 1.00 1704 2513
y_num:acf_periodb.acf:age46_55 0.44 0.13 0.18 0.70 1.00 6789 2642
y_num:acf_periodc.postMacf:age46_55 0.10 0.04 0.01 0.18 1.00 3029 3481
y_num:acf_perioda.preMacf:age56_65 0.18 0.05 0.08 0.27 1.00 2054 2885
y_num:acf_periodb.acf:age56_65 0.39 0.13 0.13 0.67 1.00 6479 3045
y_num:acf_periodc.postMacf:age56_65 0.11 0.05 0.02 0.20 1.00 2867 3111
y_num:acf_perioda.preMacf:age65P 0.23 0.05 0.13 0.33 1.00 2476 2795
y_num:acf_periodb.acf:age65P 0.43 0.13 0.17 0.69 1.00 5858 3205
y_num:acf_periodc.postMacf:age65P 0.11 0.05 0.02 0.21 1.00 3223 3331
y_num:acf_perioda.preMacf:sexM 0.02 0.04 -0.06 0.10 1.00 1060 1539
y_num:acf_periodb.acf:sexM -0.03 0.14 -0.31 0.23 1.00 6443 3067
y_num:acf_periodc.postMacf:sexM -0.01 0.04 -0.09 0.06 1.00 2046 2564
y_num:acf_perioda.preMacf:age06_15:sexM -0.00 0.05 -0.10 0.10 1.00 1500 2633
y_num:acf_periodb.acf:age06_15:sexM 0.04 0.14 -0.23 0.33 1.00 6732 2680
y_num:acf_periodc.postMacf:age06_15:sexM 0.10 0.06 -0.02 0.21 1.00 4164 3286
y_num:acf_perioda.preMacf:age16_25:sexM -0.01 0.05 -0.09 0.09 1.00 1237 2170
y_num:acf_periodb.acf:age16_25:sexM 0.06 0.14 -0.22 0.33 1.00 5883 2903
y_num:acf_periodc.postMacf:age16_25:sexM -0.01 0.05 -0.11 0.09 1.00 3107 3337
y_num:acf_perioda.preMacf:age26_35:sexM -0.01 0.05 -0.11 0.08 1.00 1357 1998
y_num:acf_periodb.acf:age26_35:sexM 0.05 0.14 -0.22 0.32 1.00 6502 2958
y_num:acf_periodc.postMacf:age26_35:sexM -0.01 0.05 -0.10 0.09 1.00 2654 3020
y_num:acf_perioda.preMacf:age36_45:sexM -0.02 0.05 -0.12 0.08 1.00 1416 2091
y_num:acf_periodb.acf:age36_45:sexM 0.00 0.14 -0.27 0.27 1.00 5693 2683
y_num:acf_periodc.postMacf:age36_45:sexM -0.00 0.05 -0.10 0.10 1.00 3041 2778
y_num:acf_perioda.preMacf:age46_55:sexM -0.01 0.05 -0.12 0.09 1.00 1537 2282
y_num:acf_periodb.acf:age46_55:sexM -0.00 0.14 -0.27 0.27 1.00 6286 3013
y_num:acf_periodc.postMacf:age46_55:sexM -0.07 0.05 -0.17 0.03 1.00 3143 2800
y_num:acf_perioda.preMacf:age56_65:sexM 0.05 0.06 -0.06 0.16 1.00 1758 2174
y_num:acf_periodb.acf:age56_65:sexM 0.07 0.14 -0.21 0.35 1.00 6312 3291
y_num:acf_periodc.postMacf:age56_65:sexM 0.01 0.05 -0.09 0.12 1.00 3364 2507
y_num:acf_perioda.preMacf:age65P:sexM -0.00 0.06 -0.12 0.12 1.00 2029 2775
y_num:acf_periodb.acf:age65P:sexM 0.08 0.14 -0.19 0.35 1.00 6610 2481
y_num:acf_periodc.postMacf:age65P:sexM 0.01 0.06 -0.10 0.12 1.00 3533 3170
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 198.71 63.50 104.03 349.06 1.00 2336 2371
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_age_sex)
pp_check(m_age_sex, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
conditional_effects(m_age_sex)
Summarise posterior
#posterior draws, and summarise
age_sex_summary <- mdata_age_sex %>%
select(year, year2, y_num, acf_period, age, sex) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
age_sex_counterfact <-
tibble(year = mdata_age_sex$year,
year2 = mdata_age_sex$year2,
y_num = mdata_age_sex$y_num,
age = mdata_age_sex$age,
sex = mdata_age_sex$sex,
acf_period = factor("a. pre-acf")) %>%
add_epred_draws(m_age_sex) %>%
group_by(year2, acf_period, age, sex) %>%
mean_qi() %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention")) %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female"))
age_sex_summary %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) %>%
ggplot() +
geom_ribbon(aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = age_sex_counterfact %>% filter(year>=1956),
aes(ymin=.epred.lower, ymax=.epred.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = age_sex_counterfact %>% filter(year>=1956),
aes(y=.epred, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = mdata_age_sex %>%
ungroup() %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(sex = case_when(sex== "M" ~ "Male",
sex== "F" ~ "Female")) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="b. acf" ~ "Counterfactual",
acf_period=="c. post-acf" ~ "Post Intervention")),
aes(y=cases, x=year2, shape=acf_period), size=2) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
ggh4x::facet_grid2(age~sex, scales = "free_y", independent = "y") +
theme_ggdist() +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="") +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="") +
scale_shape(name="") +
labs(
x = "Year",
y = "Case notifications (n)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA),
title = element_text(size=14),
axis.text = element_text(size=14),
legend.text = element_text(size=12))
ggsave(here("figures/s7.png"), height=10)
Saving 7.29 x 10 in image
Calculate summary effects
slope_draws_age_sex <- add_epred_draws(newdata = out_age_sex_3,
object = m_age_sex) %>%
arrange(y_num) %>%
ungroup() %>%
group_by(.draw, y_num, age, sex) %>%
summarise(slope = last(.epred)/first(.epred)) %>%
ungroup() %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
`summarise()` has grouped output by '.draw', 'y_num', 'age'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
Numerical summary of these summary results
As a figure
peak_g_age_sex <- peak_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
#level plot
level_g_age_sex <- level_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
#slope plot
slope_g_age_sex <- slope_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
counterfact_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(year, age, sex, .draw, .epred_counterf = .epred)
Adding missing grouping variables: `year2`, `y_num`, `acf_period`, `.row`
#Calcuate predicted number of cases per draw, then summarise.
post_change_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, age, sex, .draw, .epred)
#for the overall period
counterfact_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(age, sex, .draw) %>%
summarise(.epred_counterf = sum(.epred)) %>%
mutate(year = "Overall (1958-1963)")
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row``summarise()` has grouped output by 'age', 'sex'. You can override using the `.groups` argument.
#Calcuate incidence per draw, then summarise.
post_change_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(.draw, age, sex) %>%
summarise(.epred = sum(.epred))
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row``summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
counter_post_overall_age_sex <-
left_join(counterfact_overall_age_sex, post_change_overall_age_sex) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by(age, sex) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
mutate(year = "Overall (1958-1963)")
Joining with `by = join_by(age, sex, .draw)`
age_sex_txt <- counter_post_overall_age_sex %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
transmute(year = as.character(year),
sex = sex,
age = age,
cases_averted = glue::glue("{cases_averted}\n({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change}\n({pct_change.lower} to {pct_change.upper})"))
age_sex_txt %>% datatable()
NA
NA
counterfactual_g_age_sex <- counter_post_overall_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(x = age, y=cases_averted, ymin=cases_averted.lower, ymax=cases_averted.upper, colour=sex, shape=sex), position=position_dodge(width=0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
scale_y_continuous(labels = comma) +
labs(x="",
y="Number (95% UI)",
colour="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "bottom")
counterfactual_g_age_sex
Join together for Figure 3.
(peak_g_age_sex + level_g_age_sex) / (slope_g_age_sex + counterfactual_g_age_sex) + plot_annotation(tag_levels = "A") + plot_layout(guides = "collect") & theme(legend.position = "bottom")
ggsave(here("figures/f3.png"), width = 12, height=8)
NA
NA